# Load libraries
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Load Data
Gruel= readRDS("/home/caminors/Desktop/sc_RNAseq/Raw_seurats/Gruel.rds")
SARC= readRDS("/home/caminors/Desktop/sc_RNAseq/Raw_seurats/SARC_demultiplexed.rds")
DefaultAssay(Gruel)="RNA"
DefaultAssay(SARC)="RNA"
## calculate mt percent
Gruel[["percent.mt"]] <- PercentageFeatureSet(Gruel, pattern = "^MT-", assay = "RNA")
SARC[["percent.mt"]] <- PercentageFeatureSet(SARC, pattern = "^MT-", assay = "RNA")
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
# plotting
# Create the individual plots
plot1 <- ggplot(SARC@meta.data, aes(x=SARC$nCount_RNA, fill=SARC$orig.ident)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 500)
plot2 <- ggplot(SARC@meta.data, aes(x=SARC$nFeature_RNA, fill=SARC$orig.ident)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 500)
plot3 <- ggplot(Gruel@meta.data, aes(x=Gruel$nCount_RNA, fill=Gruel$orig.ident)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 500)
plot4 <- ggplot(Gruel@meta.data, aes(x=Gruel$nFeature_RNA, fill=Gruel$orig.ident)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 500)
# Arrange the plots in a 2x2 grid
grid.arrange(plot1, plot2, plot3, plot4, ncol = 2)
library(ggplot2)
library(dplyr)
SARC@meta.data %>%
ggplot(aes(x=nCount_RNA, y=nFeature_RNA, color=percent.mt)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 300) +
facet_wrap(~orig.ident)
## `geom_smooth()` using formula = 'y ~ x'
library(dplyr)
library(ggplot2)
# Extract metadata from Seurat object
meta_data <- Gruel@meta.data
# Calculate quantiles and classify cells
meta_data <- meta_data %>%
group_by(orig.ident) %>%
mutate(
nCount_status = case_when(
nCount_RNA > quantile(nCount_RNA, 0.99) ~ "Upper Outlier",
nCount_RNA < quantile(nCount_RNA, 0.01) ~ "Lower Outlier",
TRUE ~ "Within Range"
),
nFeature_status = case_when(
nFeature_RNA > quantile(nFeature_RNA, 0.99) ~ "Upper Outlier",
nFeature_RNA < quantile(nFeature_RNA, 0.01) ~ "Lower Outlier",
TRUE ~ "Within Range"
)
) %>%
ungroup()
Gruel@meta.data$nCount_status = meta_data$nCount_status
Gruel@meta.data$nFeature_status = meta_data$nFeature_status
table(Gruel$nCount_status)
##
## Lower Outlier Upper Outlier Within Range
## 1694 1709 166165
table(Gruel$nFeature_status)
##
## Lower Outlier Upper Outlier Within Range
## 1661 1709 166198
# Violin plot for nCount_RNA with color coding for outliers
Seurat::VlnPlot(Gruel, features = c("nCount_RNA"), group.by = "orig.ident", fill.by = "nCount_status", pt.size=0) +
geom_jitter(aes(color = Gruel@meta.data$nCount_status), size = 0.5, width = 0.2) +
scale_color_manual(values = c("Upper Outlier" = "red", "Lower Outlier" = "blue", "Within Range" = "black")) +
labs(title = "nCount_RNA Distribution by orig.ident with Outlier Highlights")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
# Violin plot for nFeature_RNA with color coding for outliers
VlnPlot(Gruel, features = c("nFeature_RNA"), group.by = "orig.ident", pt.size = 0) +
geom_jitter(aes(color = Gruel@meta.data$nFeature_status), size = 0.5, width = 0.2) +
scale_color_manual(values = c("Upper Outlier" = "red", "Lower Outlier" = "blue", "Within Range" = "black")) +
labs(title = "nFeature_RNA Distribution by orig.ident with Outlier Highlights")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
library(dplyr)
# Extract metadata from Seurat object
meta_data <- SARC@meta.data
# Calculate quantiles and classify cells
meta_data <- meta_data %>%
group_by(orig.ident) %>%
mutate(
nCount_status = case_when(
nCount_RNA > quantile(nCount_RNA, 0.99) ~ "Upper Outlier",
nCount_RNA < quantile(nCount_RNA, 0.01) ~ "Lower Outlier",
TRUE ~ "Within Range"
),
nFeature_status = case_when(
nFeature_RNA > quantile(nFeature_RNA, 0.99) ~ "Upper Outlier",
nFeature_RNA < quantile(nFeature_RNA, 0.01) ~ "Lower Outlier",
TRUE ~ "Within Range"
)
) %>%
ungroup()
SARC@meta.data$nCount_status = meta_data$nCount_status
SARC@meta.data$nFeature_status = meta_data$nFeature_status
table(SARC$nCount_status)
##
## Lower Outlier Upper Outlier Within Range
## 779 784 76396
table(SARC$nFeature_status)
##
## Lower Outlier Upper Outlier Within Range
## 779 783 76397
# Violin plot for nCount_RNA with color coding for outliers
Seurat::VlnPlot(SARC, features = c("nCount_RNA"), group.by = "orig.ident", fill.by = "nCount_status", pt.size=0) +
geom_jitter(aes(color = SARC@meta.data$nCount_status), size = 0.5, width = 0.2) +
scale_color_manual(values = c("Upper Outlier" = "red", "Lower Outlier" = "blue", "Within Range" = "black")) +
labs(title = "nCount Distribution by orig.ident with Outlier Highlights")
# Violin plot for nFeature_RNA with color coding for outliers
VlnPlot(SARC, features = c("nFeature_RNA"), group.by = "orig.ident", pt.size = 0) +
geom_jitter(aes(color = SARC@meta.data$nFeature_status), size = 0.5, width = 0.2) +
scale_color_manual(values = c("Upper Outlier" = "red", "Lower Outlier" = "blue", "Within Range" = "black")) +
labs(title = "nFeature Distribution by orig.ident with Outlier Highlights")
SARC@meta.data %>%
ggplot(aes(x = nCount_RNA, y = nFeature_RNA, color = nFeature_status)) +
geom_point() +
scale_colour_manual(values = c("Upper Outlier" = "red",
"Lower Outlier" = "blue",
"Within Range" = "grey")) +
stat_smooth(method = "lm") +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 300) +
facet_wrap(~orig.ident)
## `geom_smooth()` using formula = 'y ~ x'
p1 =VlnPlot(Gruel, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = "orig.ident") + ggtitle("Unfiltered Gruel")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
p2 = VlnPlot(SARC, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = "orig.ident")+ ggtitle("Unfiltered SARC (tumor and PDSC)")
Filtering by Quantile and percent.mt <20
Gruel_f <- subset(Gruel, subset = nCount_status == "Within Range" & nFeature_RNA >300 & nFeature_status =="Within Range" & percent.mt < 20) ## setting these thresholds as they seem the best for SARCOMA dataset :) most nFeature_RNA <300 are actually not "within Range" but just in case
SARC_f <- subset(SARC, subset = nCount_status == "Within Range" & nFeature_RNA >300 & nFeature_status =="Within Range" & percent.mt < 20)
VlnPlot(Gruel_f, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
VlnPlot(SARC_f, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by="orig.ident")
# Load the required package
library(gridExtra)
# Create the individual plots
plot1 <- ggplot(SARC_f@meta.data, aes(x=SARC_f$nCount_RNA, fill=SARC_f$orig.ident)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 500)
plot2 <- ggplot(SARC_f@meta.data, aes(x=SARC_f$nFeature_RNA, fill=SARC_f$orig.ident)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 500)
plot3 <- ggplot(Gruel_f@meta.data, aes(x=Gruel_f$nCount_RNA, fill=Gruel_f$orig.ident)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 500)
plot4 <- ggplot(Gruel_f@meta.data, aes(x=Gruel_f$nFeature_RNA, fill=Gruel_f$orig.ident)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 500)
# Arrange the plots in a 2x2 grid
grid.arrange(plot1, plot2, plot3, plot4, ncol = 2)
SARC_f@meta.data %>%
ggplot(aes(x=nCount_RNA, y=nFeature_RNA, color=percent.mt)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 300) +
facet_wrap(~orig.ident)
## `geom_smooth()` using formula = 'y ~ x'